from .base_adapter import BaseAdapter
from bs4 import BeautifulSoup
import re

class CSDNAdapter(BaseAdapter):
    def detect(self, url):
        return re.match(r"^https?://blog\.csdn\.net/", url)

    def parse(self, html):
        soup = BeautifulSoup(html, 'lxml')
        return {
            'title': self._extract_title(soup),
            'content': self._extract_content(soup),
            'code_blocks': self._extract_code(soup)
        }

    def _extract_title(self, soup):
        return soup.find('h1', class_='title-article').text.strip()

    def _extract_content(self, soup):
        return soup.find('div', id='article_content').get_text(separator='\n')

    def _extract_code(self, soup):
        return [code.text for code in soup.find_all('code', class_='hljs')]